/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
 */

/*#if !defined(lint)
#include "assym.h"
#endif*/	/* !lint */

/*
 * General assembly language routines.
 * It is the intent of this file to contain routines that are
 * specific to cpu architecture.
 */

/*
 * Override GET_NATIVE_TIME for the cpu module code.  This is not
 * guaranteed to be exactly one instruction, be careful of using
 * the macro in delay slots.
 *
 * Do not use any instruction that modifies condition codes as the 
 * caller may depend on these to remain unchanged across the macro.
 */
#if defined(CHEETAH) || defined(OLYMPUS_C)

#define	GET_NATIVE_TIME(out, scr1, scr2) \
	rd	STICK, out
#define	DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
	rd	STICK, reg;		\
	add	reg, delta, reg;	\
	wr	reg, STICK
#define	RD_TICKCMPR(out, scr)		\
	rd	STICK_COMPARE, out
#define	WR_TICKCMPR(in, scr1, scr2, label) \
	wr	in, STICK_COMPARE

#elif defined(HUMMINGBIRD)
#include <sys/spitregs.h>

/*
 * the current hummingbird version of %stick and %stick_cmp
 * were both implemented as (2) 32-bit locations in ASI_IO space;
 * the hdwr should support atomic r/w; meanwhile: ugly alert! ...
 *
 * 64-bit opcodes are required, but move only 32-bits:
 *
 * ldxa [phys]ASI_IO, %dst 	reads  the low 32-bits from phys into %dst
 * stxa %src, [phys]ASI_IO 	writes the low 32-bits from %src into phys
 *
 * reg equivalent		[phys]ASI_IO
 * ------------------		---------------
 * %stick_cmp  low-32		0x1FE.0000.F060
 * %stick_cmp high-32		0x1FE.0000.F068
 * %stick      low-32		0x1FE.0000.F070
 * %stick     high-32		0x1FE.0000.F078
 */
#define	HSTC_LOW	0x60			/* stick_cmp low  32-bits */
#define	HSTC_HIGH	0x68			/* stick_cmp high 32-bits */
#define	HST_LOW		0x70			/* stick low  32-bits */
#define	HST_HIGH	0x78			/* stick high 32-bits */
#define	HST_DIFF	0x08			/* low<-->high diff */

/*
 * Any change in the number of instructions in SETL41()
 * will affect SETL41_OFF
 */
#define	SETL41(reg, byte) \
	sethi	%hi(0x1FE00000), reg;		/* 0000.0000.1FE0.0000 */ \
	or	reg, 0xF, reg;			/* 0000.0000.1FE0.000F */ \
	sllx	reg, 12, reg;			/* 0000.01FE.0000.F000 */ \
	or	reg, byte, reg;			/* 0000.01FE.0000.F0xx */

/*
 * SETL41_OFF is used to calulate the relative PC value when a
 * branch instruction needs to go over SETL41() macro
 */
#define SETL41_OFF  16

/*
 * reading stick requires 2 loads, and there could be an intervening
 * low-to-high 32-bit rollover resulting in a return value that is
 * off by about (2 ^ 32); this rare case is prevented by re-reading
 * the low-32 bits after the high-32 and verifying the "after" value
 * is >= the "before" value; if not, increment the high-32 value.
 *
 * this method is limited to 1 rollover, and based on the fixed
 * stick-frequency (5555555), requires the loads to complete within
 * 773 seconds; incrementing the high-32 value will not overflow for
 * about 52644 years.
 *
 * writing stick requires 2 stores; if the old/new low-32 value is
 * near 0xffffffff, there could be another rollover (also rare).
 * to prevent this, we first write a 0 to the low-32, then write
 * new values to the high-32 then the low-32.
 *
 * When we detect a carry in the lower %stick register, we need to
 * read HST_HIGH again. However at the point where we detect this,
 * we need to rebuild the register address HST_HIGH.This involves more
 * than one instructions and a branch is unavoidable. However, most of
 * the time, there is no carry. So we take the penalty of a branch
 * instruction only when there is carry (less frequent).
 * 
 * For GET_NATIVE_TIME(), we start afresh and branch to SETL41().
 * For DELTA_NATIVE_TIME(), we branch to just after SETL41() since
 * addr already points to HST_LOW.
 *
 * NOTE: this method requires disabling interrupts before using
 * DELTA_NATIVE_TIME.
 */
#define	GET_NATIVE_TIME(out, scr, tmp)	\
	SETL41(scr, HST_LOW);		\
	ldxa	[scr]ASI_IO, tmp;	\
	inc	HST_DIFF, scr;		\
	ldxa	[scr]ASI_IO, out;	\
	dec	HST_DIFF, scr;		\
	ldxa	[scr]ASI_IO, scr;	\
	sub	scr, tmp, tmp;		\
	brlz,pn tmp, .-(SETL41_OFF+24); \
	sllx	out, 32, out;		\
	or	out, scr, out
#define	DELTA_NATIVE_TIME(delta, addr, high, low, tmp) \
	SETL41(addr, HST_LOW);		\
	ldxa	[addr]ASI_IO, tmp;	\
	inc	HST_DIFF, addr;		\
	ldxa	[addr]ASI_IO, high;	\
	dec	HST_DIFF, addr;		\
	ldxa	[addr]ASI_IO, low;	\
	sub	low, tmp, tmp;		\
	brlz,pn tmp, .-24;		\
	sllx	high, 32, high;		\
	or	high, low, high;	\
	add	high, delta, high;	\
	srl	high, 0, low;		\
	srlx	high, 32, high;		\
	stxa	%g0, [addr]ASI_IO;	\
	inc	HST_DIFF, addr;		\
	stxa	high, [addr]ASI_IO;	\
	dec	HST_DIFF, addr;		\
	stxa	low, [addr]ASI_IO
#define RD_TICKCMPR(out, scr)		\
	SETL41(scr, HSTC_LOW);		\
	ldxa	[scr]ASI_IO, out;	\
	inc	HST_DIFF, scr;		\
	ldxa	[scr]ASI_IO, scr;	\
	sllx	scr, 32, scr;		\
	or	scr, out, out
#define WR_TICKCMPR(in, scra, scrd, label) \
	SETL41(scra, HSTC_HIGH);	\
	srlx	in, 32, scrd;		\
	stxa	scrd, [scra]ASI_IO;	\
	dec	HST_DIFF, scra;		\
	stxa	in, [scra]ASI_IO

#else	/* !CHEETAH && !HUMMINGBIRD */

#define	GET_NATIVE_TIME(out, scr1, scr2) \
	rdpr	%tick, out
#define	DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
	rdpr	%tick, reg;		\
	add	reg, delta, reg;	\
	wrpr	reg, %tick
#define	RD_TICKCMPR(out, scr)		\
	rd	TICK_COMPARE, out
#ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */
/*
 * Writes to the TICK_COMPARE register sometimes fail on blackbird modules.
 * The failure occurs only when the following instruction decodes to wr or
 * wrpr.  The workaround is to immediately follow writes to TICK_COMPARE
 * with a read, thus stalling the pipe and keeping following instructions
 * from causing data corruption.  Aligning to a quadword will ensure these
 * two instructions are not split due to i$ misses.
 */
#define WR_TICKCMPR(cmpr,scr1,scr2,label)	\
	ba,a	.bb_errata_1.label		;\
	.align	64				;\
.bb_errata_1.label:				;\
	wr	cmpr, TICK_COMPARE		;\
	rd	TICK_COMPARE, %g0
#else	/* BB_ERRATA_1 */
#define	WR_TICKCMPR(in,scr1,scr2,label)		\
	wr	in, TICK_COMPARE
#endif	/* BB_ERRATA_1 */

#endif	/* !CHEETAH && !HUMMINGBIRD */

#include <sys/asm_linkage.h>
#include <sys/privregs.h>
#include <sys/machthread.h>
#include <sys/intreg.h>

#if !defined(lint)
#include "assym.h"
#endif	/* !lint */

#if !defined(lint) && !defined(__lint)

	.seg	".text"
kstat_q_panic_msg:
	.asciz	"kstat_q_exit: qlen == 0"

	ENTRY(kstat_q_panic)
	save	%sp, -SA(MINFRAME), %sp
	sethi	%hi(kstat_q_panic_msg), %o0
	call	panic
	or	%o0, %lo(kstat_q_panic_msg), %o0
	/*NOTREACHED*/
	SET_SIZE(kstat_q_panic)

#define	BRZPN	brz,pn
#define	BRZPT	brz,pt

#define	KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
	ld	[%o0 + QTYPE/**/CNT], %o1;	/* %o1 = old qlen */	\
	QOP	%o1, 1, %o2;			/* %o2 = new qlen */	\
	QBR	%o1, QZERO;			/* done if qlen == 0 */	\
	st	%o2, [%o0 + QTYPE/**/CNT];	/* delay: save qlen */	\
	ldx	[%o0 + QTYPE/**/LASTUPDATE], %o3;			\
	ldx	[%o0 + QTYPE/**/TIME], %o4;	/* %o4 = old time */	\
	ldx	[%o0 + QTYPE/**/LENTIME], %o5;	/* %o5 = old lentime */	\
	sub	%g1, %o3, %o2;			/* %o2 = time delta */	\
	mulx	%o1, %o2, %o3;			/* %o3 = cur lentime */	\
	add	%o4, %o2, %o4;			/* %o4 = new time */	\
	add	%o5, %o3, %o5;			/* %o5 = new lentime */	\
	stx	%o4, [%o0 + QTYPE/**/TIME];	/* save time */		\
	stx	%o5, [%o0 + QTYPE/**/LENTIME];	/* save lentime */	\
QRETURN;								\
	stx	%g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */

	.align 16
	ENTRY(kstat_waitq_enter)
	GET_NATIVE_TIME(%g1, %g2, %g3)
	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
	SET_SIZE(kstat_waitq_enter)

	.align 16
	ENTRY(kstat_waitq_exit)
	GET_NATIVE_TIME(%g1, %g2, %g3)
	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W)
	SET_SIZE(kstat_waitq_exit)

	.align 16
	ENTRY(kstat_runq_enter)
	GET_NATIVE_TIME(%g1, %g2, %g3)
	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
	SET_SIZE(kstat_runq_enter)

	.align 16
	ENTRY(kstat_runq_exit)
	GET_NATIVE_TIME(%g1, %g2, %g3)
	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R)
	SET_SIZE(kstat_runq_exit)

	.align 16
	ENTRY(kstat_waitq_to_runq)
	GET_NATIVE_TIME(%g1, %g2, %g3)
	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
	SET_SIZE(kstat_waitq_to_runq)

	.align 16
	ENTRY(kstat_runq_back_to_waitq)
	GET_NATIVE_TIME(%g1, %g2, %g3)
	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
	SET_SIZE(kstat_runq_back_to_waitq)

#endif	/* !(lint || __lint) */
